install.packages("readxl")
library(readxl)

install.packages("dplyr")
library(dplyr)
google <- read_excel("googleplaystore1.xlsx")

google1 <- na.omit(google)

View(google1)

str(google1)
summary(google1)

google2 <- google1

google2$Size <- ifelse(google1$Size == 'Varies with device', NA , google1$Size)

google2 <- na.omit(google2)

View(google3)
View(google2)

install.packages("ggplot2")
library(ggplot2)

price_free <- google2
price_paid <- google2

ggplot(data=google2, aes(x=google2$Type)) + geom_bar()
#8000개 이상이 free

class(google2$Category)
class(google2$Size)

google2$Rating <- ifelse(google2$Rating == 'NaN', NA , google2$Rating)
google2 <- na.omit(google2)

google_category <- google2 %>%  group_by(Category)
summarise(google_category)
table(summarise(google_category))

ggplot(data=google_category, aes(x=google_category$Category)) + geom_bar()

# 가격이 무료인 것과 유료인 것의 평균 다운로드 수 비교
price_free <- google2 %>% filter(Type == 'Free')
price_paid <- google2 %>% filter(Type == 'Paid')
View(price_free)
table(price_free$Installs)
price_free$Installs <- ifelse( price_free$Installs == '1+', 1, 
                               ifelse(price_free$Installs == '10+', 10 ,
                                      ifelse(price_free$Installs == '100+',100,
                                             ifelse(price_free$Installs == '1,000+',1000,
                                                    ifelse(price_free$Installs == '10,000+',10000,
                                                           ifelse(price_free$Installs=='100,000+',100000,
                                                                  ifelse(price_free$Installs=='5+',5,
                                                                         ifelse(price_free$Installs=='50+',50,
                                                                                ifelse(price_free$Installs=='500+',500,
                                                                                       ifelse(price_free$Installs =='5,000+',5000,
                                                                                              ifelse(price_free$Installs=='50,000+',50000,
                                                                                                    ifelse(price_free$Installs=='1,000,000+',1000000,
                                                                                                           ifelse(price_free$Installs=='500,000+',500000,
                                                                                                                  ifelse(price_free$Installs=='5,000,000+',5000000,
                                                                                                                         ifelse(price_free$Installs=='10,000,000+',10000000,
                                                                                                                                ifelse(price_free$Installs=='50,000,000+',50000000,
                                                                                                                                       ifelse(price_free$Installs=='100,000,000+',100000000,
                                                                                                                                              ifelse(price_free$Installs=='500,000,000+',500000000,
                                                                                                                                                     ifelse(price_free$Installs=='1,000,000,000+',1000000000,NA)))))))))))))))))))

google2$Installs <- ifelse( google2$Installs == '1+', 1, 
                               ifelse(google2$Installs == '10+', 10 ,
                                      ifelse(google2$Installs == '100+',100,
                                             ifelse(google2$Installs == '1,000+',1000,
                                                    ifelse(google2$Installs == '10,000+',10000,
                                                           ifelse(google2$Installs=='100,000+',100000,
                                                                  ifelse(google2$Installs=='5+',5,
                                                                         ifelse(google2$Installs=='50+',50,
                                                                                ifelse(google2$Installs=='500+',500,
                                                                                       ifelse(google2$Installs =='5,000+',5000,
                                                                                              ifelse(google2$Installs=='50,000+',50000,
                                                                                                     ifelse(google2$Installs=='1,000,000+',1000000,
                                                                                                            ifelse(google2$Installs=='500,000+',500000,
                                                                                                                   ifelse(google2$Installs=='5,000,000+',5000000,
                                                                                                                          ifelse(google2$Installs=='10,000,000+',10000000,
                                                                                                                                 ifelse(google2$Installs=='50,000,000+',50000000,
                                                                                                                                        ifelse(google2$Installs=='100,000,000+',100000000,
                                                                                                                                               ifelse(google2$Installs=='500,000,000+',500000000,
                                                                                                                                                      ifelse(google2$Installs=='1,000,000,000+',1000000000,NA)))))))))))))))))))





View(price_free)
table(price_free$Installs)
table(price_paid$Installs)
mean(price_free$Installs)
mean(price_paid$Installs)
table(price_paid$Price)
mean(price_paid$Price)
View(price_paid)
ggplot(data=price_paid, aes(x=price_paid$Price, y=price_paid$Installs)) + geom_line()
ggplot(data=price_free, aes(x=price_free$Price)) + geom_bar()



mean_free <- mean(price_free$Installs)
mean_paid <- mean(price_paid$Installs)

TypeOfDownload <- data.frame(Type <- c("Free","Paid"),
                             Download <- c(mean_free, mean_paid))
ggplot(data=TypeOfDownload, aes(x=Type, y=Download)) + geom_col() + coord_flip()




max(price_free$Installs)
free_filter <- price_free %>% arrange(desc(Installs)) %>% head(10)
free_filter

paid_filter <- price_paid %>% arrange(Installs) %>% head(10)
paid_filter


View(google2)
str(google2$Reviews)
summarise(google2$Reviews)
summary(google2$Reviews)
table(google2$Reviews)


#장르별 무료인 것과 유료인 것들의 다운로드 수
Category_install <- google2 %>% 
  filter(!is.na(Installs) & !is.na(Category) & !is.na(Type)) %>% 
  group_by(Category, Type) %>% 
  summarise(mean_install = mean(Installs)) %>% 
  arrange(desc(mean_install))

table(Category_install)
View(Category_install)
table(Category_install$Category)


library(tidyverse)



result <- ggplot(data = Category_install, aes( x = reorder(Category_install$Category,-Category_install$mean_install) , y=Category_install$mean_install)) + geom_col() + coord_flip()
result + labs(x="장르",y="다운로드",title = "장르별 다운로드 수")


mycolor <- ifelse(Category_install$Type == 'Paid', "type1", "type2")

# plot
ggplot(data = Category_install, aes(x=Category_install$Type, y=Category_install$mean_install)) +
  geom_segment( aes(x=Category_install$Category, xend=Category_install$Category, y=1, yend=Category_install$mean_install, color=mycolor), size=1, alpha=0.9) +
  theme_light() +
  theme(
    axis.text.x = element_text(angle = 90),
    legend.position = "none",
    panel.border = element_blank(),
  ) +
  xlab("") +
  ylab("Value of Y")

#####################
## 장르별 리뷰수
str(google2$Reviews)
summary(google2$Reviews)
class(google2$Reviews)
google2$Reviews <- as.numeric(google2$Reviews)

Category_review <- google2 %>% 
  filter(!is.na(Category) & !is.na(Type) & !is.na(Reviews)) %>% 
  group_by(Category , Type) %>% 
  summarise(mean_review = mean(Reviews)) %>% 
  arrange(desc(mean_review))

result1 <- ggplot(data = Category_review, aes( x = reorder(Category_review$Category,-Category_review$mean_review) , y=Category_review$mean_review)) + geom_col() + coord_flip()
result1 + labs(x="장르",y="리뷰 수",title = "장르별 리뷰뷰 수")

Category_review1 <- google2 %>% 
  filter(!is.na(Category) & !is.na(Type) & !is.na(Reviews)) %>% 
  group_by(Type, Category) %>% 
  summarise(mean_review = mean(Reviews)) %>% 
  arrange(desc(mean_review))
View(Category_review1)

#########################################################
install.packages("packcircles")
library(packcircles)

install.packages("ggplot2")
library(ggplot2)

install.packages("viridis")
library(viridis)

# Create data
data <- data.frame(category = Category_install$Category, mean = Category_install$mean_install) 

# Generate the layout. sizetype can be area or radius, following your preference on what to be proportional to value.
packing <- circleProgressiveLayout(data$category, sizetype='area')
data = cbind(data, packing)
dat.gg <- circleLayoutVertices(packing, npoints=50)

# 1 -- Custom the color: whatever palette. (see ggplot2 section for more explanation)
ggplot() + 
  geom_polygon(data = dat.gg, aes(x, y, group = id, fill=as.factor(id)), colour = "black", alpha = 0.6) +
  scale_fill_manual(values = magma(nrow(data))) +
  geom_text(data = data, aes(x, y, size=value, label = group)) +
  scale_size_continuous(range = c(1,4)) +
  theme_void() + 
  theme(legend.position="none") +
  coord_equal()
